options(scipen = 999)


library(tidyverse)
library(ggthemes)
library(lubridate)
library(maps)
library(usmap)
library(ggridges)
library(viridis)
library(zoo)
library(arm)
library(lfe)
library(ggalt)



city.data <- read.csv('Data/city_data_minimal.csv')
ois.min <- read.csv('Data/OIS_Dataset_Minimal.csv')
load("Data/lemas/2020/38651-0001-Data.rda")
lemas.2020 <- da38651.0001
rm('da38651.0001')
nibrs.counts <- read.csv('Data/other_data/offenderCount20102021.csv')
load(file='Data/other_data/oisWithCensusTracts.RData')
load(file='Data/other_data/OnlyOurTractsKeyVariables.RData')
census.race.data <- read.csv('Data/other_data/placeLevelBlackPercent20092019.csv')
load('Data/other_data/tractMappingFull(Clean).RData')
load('Data/other_data/codDataOurCitiesCounts.RData')

ois.min$Year[ois.min$Year==2103] <- 2003
ois.loc.tracts$Year[ois.loc.tracts$Year==2103] <- 2003
ois.min <- ois.min %>% filter(Hit !='Miss')




city.data$first.year <- rep(NA,dim(city.data)[1])
city.data$last.year <- rep(NA,dim(city.data)[1])
for(i in 1:nrow(city.data)){
  if(grepl('-',city.data$Range.Provided.By.City[i])){
    city.data$first.year[i] <- unlist(strsplit(as.character(city.data$Range.Provided.By.City[i]),'-',fixed=TRUE))[1]
    city.data$last.year[i] <- unlist(strsplit(as.character(city.data$Range.Provided.By.City[i]),'-',fixed=TRUE))[2]
  }
}
city.data$totalYears <- ifelse(!is.na(city.data$first.year), as.numeric(city.data$last.year)-as.numeric(city.data$first.year)+1,0)



tractsWithCountsAndPercentiles <- ourTractsWithVars %>%
  left_join(tractMapping, by=c('GEOID'='tractId')) %>%  # merge in cross-walk identifiers
  group_by(GEOID_155) %>%   # group by place
  mutate(perc.black = ecdf(black.alone.in.comb)(black.alone.in.comb),  # calculate empirical
         perc.asian = ecdf(asian.alone)(asian.alone),                  # distribtion and percentile
         perc.white = ecdf(white.alone)(white.alone),                  # within cities for
         perc.hisp = ecdf(hispanic)(hispanic),                         # each of our key variables
         poverty = ecdf(poverty.count)(poverty.count),
         renter = ecdf(renter.count)(renter.count),
         single.parent = ecdf(single.mothers+single.fathers)(single.mothers+single.fathers),
         single.mom = ecdf(single.mothers)(single.mothers),
         single.dad = ecdf(single.fathers)(single.fathers)) %>%
  left_join(ois.loc.tracts %>% group_by(GEOID,Year) %>%                # merge in shooting data
              summarize(n.shot = n()), by=c('GEOID'='GEOID','year'='Year')) %>%
  mutate(n.shot = ifelse(is.na(n.shot),0,n.shot)) %>%                  # fill in zeroes for no shooting tract-years
  mutate(geo.short = substr(GEOID_160,10,16)) %>%                      # get fips+tract id]
  left_join(city.data %>%                                              # merge in city data
              mutate(geo.short= substr(GEOID,8,14)) %>%                # create fips+tract id for merge
              dplyr::select(geo.short,first.year,last.year), by='geo.short') %>%
  filter(year >= first.year & year<=last.year)                         # discard tract-years not covered by data

countOfTracts <- tractsWithCountsAndPercentiles %>%
  group_by(GEOID_155, year) %>%
  summarize(n.tracts = n()) %>%
  ungroup() %>%
  group_by(GEOID_155) %>%
  summarize(avg.tracts = mean(n.tracts)) 





offense_group_counts_wide <- offense_group_counts %>%
  pivot_wider(
    names_from = offense_group,
    values_from = n.crimes)
offense_group_counts_wide$total.crimes <- rowSums(offense_group_counts_wide[,4:36], na.rm=TRUE)
offense_group_counts_wide[is.na(offense_group_counts_wide)] <- 0

# merge tract data with crime data, 
# keeping only tracts for which we have crime data  
tractDataWithCrime <- tractsWithCountsAndPercentiles %>%
  left_join(offense_group_counts_wide, by=c('GEOID'='census_tract', 'year'='year')) %>%
  filter(!is.na(total.crimes)) %>%
  mutate(city.id = substr(GEOID_155,1,16))


## calculate violent crime
##

tractDataWithCrime$violent.crime <-
  tractDataWithCrime$arson +
  tractDataWithCrime$`assault offenses` +
  tractDataWithCrime$`homicide offenses` +
  tractDataWithCrime$`human trafficking` +
  tractDataWithCrime$`kidnapping/abduction` +
  tractDataWithCrime$`robbery` +
  tractDataWithCrime$`sex offenses` +
  tractDataWithCrime$`weapon law violations`

tractDataWithCrime <- tractDataWithCrime %>%
  mutate(viol.crime.perc = ecdf(violent.crime)(violent.crime))

chi.tract.dats <- tractDataWithCrime %>% filter(city_name=='Chicago') %>%
  group_by(GEOID,NAME) %>%
  summarise(totalShot = sum(n.shot),
            poverty = mean(poverty),
            perc.black=mean(perc.black),
            perc.white=mean(perc.white),
            viol.crime=mean(violent.crime),
            viol.crime.perc=mean(viol.crime.perc))

high.viol.chi <- chi.tract.dats %>% filter(viol.crime.perc>.8)



##############################################
##############################################
##
##  Figure 5.2
##
##############################################
##############################################

pdf('FinalFigures/fig5-2.pdf',5,4)
tractDataWithCrime %>%
  ggplot(aes(x=violent.crime,y=n.shot)) +
  geom_smooth(aes(x=violent.crime,y=n.shot), color='black') +
  xlim(0,2500) +
  theme_tufte() +
  theme(legend.position = 'top') +
  labs(title='Violent Crime and Police Shootings',
       x = 'Total violent crime',
       y='Yearly police shootings') +
  theme(axis.title.x = element_text(), 
        axis.line=element_line())
dev.off()



##############################################
##############################################
##
##  Figrue 5.3
##
##############################################
##############################################


pdf('FinalFigures/fig5-3a.pdf',5,4)
ggplot(tractsWithCountsAndPercentiles) +
  #geom_point(aes(y=n.shot,x=perc.black)) +
  geom_smooth(aes(y=n.shot,x=poverty), color='black') +
  theme_tufte() +
  theme(axis.title.x = element_text(),
        axis.line=element_line()) +
  labs(title='Poverty Rates and Police Shootings',
       x = 'City-Specific Percentile Poverty Rate',
       y='Total Police Shootings') +
  theme(plot.caption = element_text(hjust = 0)) +
  coord_cartesian(ylim=c(0.0,0.05))
dev.off()

pdf('FinalFigures/fig5-3b.pdf',5,4)
ggplot(tractsWithCountsAndPercentiles) +
  #geom_point(aes(y=n.shot,x=perc.black)) +
  geom_smooth(aes(y=n.shot,x=renter), color='black') +
  theme_tufte() +
  theme(axis.title.x = element_text(),
        axis.line=element_line()) +
  labs(title='Rentership and Police Shootings',
       x = 'City-Specific Percentile Renter',
       y='Total Police Shootings') +
  theme(plot.caption = element_text(hjust = 0)) +
  coord_cartesian(ylim=c(0.0,0.05))
dev.off()

pdf('FinalFigures/fig5-3c.pdf',5,4)
ggplot(tractsWithCountsAndPercentiles) +
  #geom_point(aes(y=n.shot,x=perc.black)) +
  geom_smooth(aes(y=n.shot,x=single.mom), color='black') +
  theme_tufte() +
  theme(axis.title.x = element_text(),
        axis.line=element_line()) +
  labs(title='Single Mothers and Police Shootings',
       x = 'City-Specific Percentile Single Mothers',
       y='Total Police Shootings') +
  theme(plot.caption = element_text(hjust = 0)) +
  coord_cartesian(ylim=c(0.0,0.05))
dev.off()


##############################################
##############################################
##
##  Figure 5.4
##
##############################################
##############################################

pdf('FinalFigures/fig5-4a.pdf',5,4)
ggplot(tractsWithCountsAndPercentiles) +
  #geom_point(aes(y=n.shot,x=perc.black)) +
  geom_smooth(aes(y=n.shot,x=perc.black), color='black') +
  theme_tufte() +
  theme(axis.title.x = element_text(),
        axis.line=element_line()) +
  labs(
    x = 'City-Specific Percentile Black Residents',
    y='Total police shootings') +
  theme(plot.caption = element_text(hjust = 0)) +
  coord_cartesian(ylim=c(0.0,0.05))
dev.off()

pdf('FinalFigures/fig5-4b.pdf',5,4)
ggplot(tractsWithCountsAndPercentiles) +
  #geom_point(aes(y=n.shot,x=perc.black)) +
  geom_smooth(aes(y=n.shot,x=perc.hisp), color='black') +
  theme_tufte() +
  theme(axis.title.x = element_text(),
        axis.line=element_line()) +
  labs(
    x = 'City-Specific Percentile Hispanic Residents',
    y='Total police shootings') +
  theme(plot.caption = element_text(hjust = 0)) +
  coord_cartesian(ylim=c(0.0,0.05))
dev.off()


pdf('FinalFigures/fig5-4c.pdf',5,4)
ggplot(tractsWithCountsAndPercentiles) +
  #geom_point(aes(y=n.shot,x=perc.black)) +
  geom_smooth(aes(y=n.shot,x=perc.asian), color='black') +
  theme_tufte() +
  theme(axis.title.x = element_text(),
        axis.line=element_line()) +
  labs(
    x = 'City-Specific Percentile Asian Residents',
    y='Total police shootings') +
  theme(plot.caption = element_text(hjust = 0)) +
  coord_cartesian(ylim=c(0.0,0.05))
dev.off()

pdf('FinalFigures/fig5-4d.pdf',5,4)
ggplot(tractsWithCountsAndPercentiles) +
  #geom_point(aes(y=n.shot,x=perc.black)) +
  geom_smooth(aes(y=n.shot,x=perc.white), color='black') +
  theme_tufte() +
  theme(axis.title.x = element_text(),
        axis.line=element_line()) +
  labs(
    x = 'City-Specific Percentile White Residents',
    y='Total police shootings') +
  theme(plot.caption = element_text(hjust = 0)) +
  coord_cartesian(ylim=c(0.0,0.05))
dev.off()




##############################################
##############################################
##
##  Figure 5.5
##
##############################################
##############################################


pdf('FinalFigures/fig5-5a.pdf',6,3)
tractDataWithCrime %>%
  mutate(poverty.perc.group = cut(poverty, breaks=c(0,.33,.66,1))) %>%
  ggplot(aes(x=violent.crime,y=n.shot)) +
  stat_smooth(aes(group=poverty.perc.group), colour='black',
              method="loess") +
  scale_y_continuous(expand=c(0,0), limits=c(-50,100)) +
  coord_cartesian(ylim=c(-.01,.25)) +
  xlim(0,2500) +
  theme_tufte() +
  theme(axis.title.x = element_text(),
        axis.line=element_line()) +
  facet_wrap(~poverty.perc.group, ncol = 3, 
             labeller = as_labeller(c('(0,0.33]'='Bottom Third Poverty',
                                      '(0.33,0.66]'='Middle Third Poverty',
                                      '(0.66,1]'='Top Third Poverty'))) +
  theme(legend.position = 'top') +
  labs(title='Violent Crime and Police Shootings',
       x = 'Total violent crime',
       y='Total police shootings') +
  theme(axis.title.x = element_text(), 
        axis.line=element_line())
dev.off()


pdf('FinalFigures/fig5-5b.pdf',6,3)
tractDataWithCrime %>%
  mutate(black.perc.group = cut(perc.black, breaks=c(0,.33,.66,1))) %>%
  ggplot(aes(x=violent.crime,y=n.shot)) +
  stat_smooth(aes(group=black.perc.group), colour='black',
              method="loess") +
  scale_y_continuous(expand=c(0,0), limits=c(-50,100)) +
  coord_cartesian(ylim=c(-.01,.25)) +
  xlim(0,2500) +
  theme_tufte() +
  theme(axis.title.x = element_text(),
        axis.line=element_line()) +
  facet_wrap(~black.perc.group, ncol = 3, 
             labeller = as_labeller(c('(0,0.33]'='Bottom Third Black Pop.',
                                      '(0.33,0.66]'='Middle Third Black Pop.',
                                      '(0.66,1]'='Top Third Black Pop.'))) +
  theme(legend.position = 'top') +
  labs(title=' ',
       x = 'Total violent crime',
       y='Total police shootings') +
  theme(axis.title.x = element_text(), 
        axis.line=element_line())
dev.off()




##############################################
##############################################
##
##  Figure 5.6
##
##############################################
##############################################

#run the model

ch5.model <- felm(n.shot~rescale(perc.black)*rescale(violent.crime)+
                    rescale(perc.asian)*rescale(violent.crime)+
                    rescale(perc.hisp)*rescale(violent.crime)+
                    rescale(renter)+
                    rescale(poverty)+
                    rescale(single.mom)|city.id+year, data=tractDataWithCrime)

#create the figure
pdf('FinalFigures/fig5-6.pdf',5,3)
ggplot() +
  geom_label(aes(x = .6, y = .075, label = "75th percentile\nBlack population"), 
             hjust = 0, 
             vjust = 0.5, 
             lineheight = 0.8,
             colour = "#555555", 
             fill = "white", 
             label.size = NA, 
             family="Helvetica", 
             size = 3) +
  geom_label(aes(x = 1.8, y = .055, label = "25th percentile\nBlack population"), 
             hjust = 0, 
             vjust = 0.5, 
             lineheight = 0.8,
             colour = "#555555", 
             fill = "white", 
             label.size = NA, 
             family="Helvetica", 
             size = 3) +
  geom_abline(intercept =.014+coef(ch5.model)[1]*.25, slope=(coef(ch5.model)[2]+coef(ch5.model)[8]*.25), colour='grey', lwd=2) +
  geom_abline(intercept =.014+coef(ch5.model)[1]*.75, slope=((coef(ch5.model)[2]+coef(ch5.model)[8])*.75), colour='black', lwd=2) +
  ylim(0,.1) + 
  xlim(-1,2.5)+ # there just aren't many observations that low in crime
  theme_tufte() +
  theme(axis.title.x = element_text(),
        axis.line=element_line()) +
  labs(title='Violent Crime and Predicted Police Shootings',
       x = 'Annual Violent Crime',
       y='Predicted police shootings')
dev.off()